source('00_util_scripts/mod_bplot.R')
source('00_util_scripts/mod_bulk.R')

diabete_mf_bulk <-
'https://ftp.ncbi.nlm.nih.gov/geo/series/GSE281nnn/GSE281600/suppl/GSE281600%5FP27955%5Festimated%5Fcounts.txt.gz'

data <- read_delim(diabete_mf_bulk)

data

meta <- pluck_geo('GSE281600')

meta <- pData(meta) |>
  as_tibble() |>
  select(description, cell.type.ch1, status.ch1, tissue.ch1)

meta

data <- data |>
  mutate(geneid = str_remove(...1, '\\..+'),
         transcript = NULL, ref_genome = NULL) |>
  ensembl_to_symbol(geneid)

data |> glimpse()

m2_count <- data |>
  filter(transcript == 'TRPM2') |>
  select(c(all_of(contains('P27955')), transcript)) |>
  pivot_longer(-transcript)

tdb <- data |>
  select(c(all_of(contains('P27955')), transcript)) |>
  pivot_longer(-transcript)

tdb_m2 <- tdb |>
  summarise(total_count = sum(value), .by = name) |>
  inner_join(m2_count) |>
  mutate(cpm = 1e6 * value/total_count) |>
  inner_join(meta, join_by(name == description))

tdb_m2 |>
  mutate(group = str_remove(status.ch1, '\\(.+')) |>
  filter(str_detect(cell.type.ch1, 'CD206'),
         group %in% c('ND','T2D')) |>
  ggplot(aes(group, log1p(cpm), color = group)) +
  geom_boxplot(outliers = F) +
  geom_jitter(height = 0, width = .1) +
  facet_wrap(~tissue.ch1 + cell.type.ch1) +
  theme_bw() +
  labs(title = 'TRPM2 expression in pancreatic macrophages',
       subtitle = 'GSE281600')

tdb_t2d <- tdb |>
  inner_join(meta, join_by(name == description)) |>
  mutate(group = str_remove(status.ch1, '\\(.+')) |>
  filter(str_detect(cell.type.ch1, 'CD206'),
         group %in% c('ND','T2D')) |>
  tidybulk(.sample = name, .transcript = transcript, .abundance = value) |>
  quick_process_bulk(group = group)

tdb_t2d |>
  filter(!is.na(transcript),
         str_detect(tissue.ch1, 'Exo'), str_detect(cell.type.ch1, '-')) |>
  test_differential_abundance(~ 0+group, omit_contrast_in_colnames = T,
                              contrasts = 'groupT2D-groupND') |>
  pivot_transcript() |>
  filter(transcript == 'TRPM2')
